import pandas as pd  
from sklearn.preprocessing import StandardScaler, LabelEncoder  
from sklearn.cluster import KMeans  
import pickle
  
# Load the dataset  
credit_customers = pd.read_csv("credit_customers.csv")  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = credit_customers[important_columns].copy()  
  
# Preprocess the data  
data_for_clustering['savings_status'] = LabelEncoder().fit_transform(data_for_clustering['savings_status'])  
data_for_clustering['employment'] = LabelEncoder().fit_transform(data_for_clustering['employment'])  
data_for_clustering = pd.get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  
  
# Normalize the data  
data_for_clustering_scaled = StandardScaler().fit_transform(data_for_clustering)  
  
# Perform K-means clustering  
kmeans = KMeans(n_clusters=4, random_state=42)  
cluster_labels = kmeans.fit_predict(data_for_clustering_scaled)  
  
# Add the cluster labels to the original dataset  
credit_customers['cluster'] = cluster_labels  
  
# Filter the dataset based on the specified criteria    
good_credit_history = credit_customers['credit_history'].isin(['existing paid', 'all paid'])    
age_group = (credit_customers['age'] >= 25) & (credit_customers['age'] <= 45)    
stable_employment = credit_customers['employment'].isin(['>=7', '4<=X<7'])    
  
# Combine the filters and apply them to the dataset    
target_customers = credit_customers[good_credit_history & age_group & stable_employment]    
  
# Extract the target customer segments    
target_customer_segments = target_customers['cluster'].unique().tolist()    
  
# Return the list of target customer segments     
print("target_customer_segments:\n", target_customer_segments)  
pickle.dump(target_customer_segments,open("./ref_result/target_customer_segments.pkl","wb"))
